<!DOCTYPE HTML>
<html lang="en" >
    
    <head>
        
        <meta charset="UTF-8">
        <meta http-equiv="X-UA-Compatible" content="IE=edge" />
        <title>xpath爬取前程无忧的数据 | 爬虫</title>
        <meta content="text/html; charset=utf-8" http-equiv="Content-Type">
        <meta name="description" content="">
        <meta name="generator" content="GitBook 2.6.7">
        
        
        <meta name="HandheldFriendly" content="true"/>
        <meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=no">
        <meta name="apple-mobile-web-app-capable" content="yes">
        <meta name="apple-mobile-web-app-status-bar-style" content="black">
        <link rel="apple-touch-icon-precomposed" sizes="152x152" href="../gitbook/images/apple-touch-icon-precomposed-152.png">
        <link rel="shortcut icon" href="../gitbook/images/favicon.ico" type="image/x-icon">
        
    <link rel="stylesheet" href="../gitbook/style.css">
    
        
        <link rel="stylesheet" href="../gitbook/plugins/gitbook-plugin-highlight/website.css">
        
    
        
        <link rel="stylesheet" href="../gitbook/plugins/gitbook-plugin-search/search.css">
        
    
        
        <link rel="stylesheet" href="../gitbook/plugins/gitbook-plugin-fontsettings/website.css">
        
    
    

        
    
    
    <link rel="next" href="../spider_projects/存储数据库的脚本.html" />
    
    
    <link rel="prev" href="../spider_projects/使用代理在普通爬虫脚本下.html" />
    

        
    </head>
    <body>
        
        
    <div class="book"
        data-level="10.2"
        data-chapter-title="xpath爬取前程无忧的数据"
        data-filepath="spider_projects/xpath爬取前程无忧的数据.md"
        data-basepath=".."
        data-revision="Fri Oct 19 2018 08:44:08 GMT+0800 (中国标准时间)"
        data-innerlanguage="">
    

<div class="book-summary">
    <nav role="navigation">
        <ul class="summary">
            
            
            
            

            

            
    
        <li class="chapter " data-level="0" data-path="index.html">
            
                
                    <a href="../index.html">
                
                        <i class="fa fa-check"></i>
                        
                        序言
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="1" data-path="认识爬虫/introduceSpider.html">
            
                
                    <a href="../认识爬虫/introduceSpider.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>1.</b>
                        
                        认识爬虫
                    </a>
            
            
            <ul class="articles">
                
    
        <li class="chapter " data-level="1.1" data-path="认识爬虫/http.html">
            
                
                    <a href="../认识爬虫/http.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>1.1.</b>
                        
                        HTTP
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="1.2" data-path="认识爬虫/Requests.html">
            
                
                    <a href="../认识爬虫/Requests.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>1.2.</b>
                        
                        requests
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="1.3" data-path="认识爬虫/初步爬虫.html">
            
                
                    <a href="../认识爬虫/初步爬虫.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>1.3.</b>
                        
                        初步爬虫小项目
                    </a>
            
            
        </li>
    

            </ul>
            
        </li>
    
        <li class="chapter " data-level="2" data-path="正则表达式/正则表达式.html">
            
                
                    <a href="../正则表达式/正则表达式.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>2.</b>
                        
                        正则表达式
                    </a>
            
            
            <ul class="articles">
                
    
        <li class="chapter " data-level="2.1" data-path="正则表达式/正则表达式练习.html">
            
                
                    <a href="../正则表达式/正则表达式练习.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>2.1.</b>
                        
                        正则表达式练习
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="2.2" data-path="正则表达式/正则表达式之后的第一个项目.html">
            
                
                    <a href="../正则表达式/正则表达式之后的第一个项目.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>2.2.</b>
                        
                        正则表达式之后的第一个项目
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="2.3" data-path="正则表达式/正则表达式常用表.html">
            
                
                    <a href="../正则表达式/正则表达式常用表.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>2.3.</b>
                        
                        正则表达式常用表
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="2.4" data-path="正则表达式/正则表达式重点1.html">
            
                
                    <a href="../正则表达式/正则表达式重点1.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>2.4.</b>
                        
                        正则表达式重点
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="2.5" data-path="正则表达式/正则表达式常问问题.html">
            
                
                    <a href="../正则表达式/正则表达式常问问题.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>2.5.</b>
                        
                        正则表达式常问问题
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="2.6" data-path="正则表达式/正则爬虫练习.html">
            
                
                    <a href="../正则表达式/正则爬虫练习.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>2.6.</b>
                        
                        正则爬虫项目练习
                    </a>
            
            
        </li>
    

            </ul>
            
        </li>
    
        <li class="chapter " data-level="3" data-path="XPATH/xpath-in.html">
            
                
                    <a href="../XPATH/xpath-in.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>3.</b>
                        
                        XPATH
                    </a>
            
            
            <ul class="articles">
                
    
        <li class="chapter " data-level="3.1" data-path="XPATH/Xpath.html">
            
                
                    <a href="../XPATH/Xpath.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>3.1.</b>
                        
                        XPATH认识
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="3.2" data-path="XPATH/Xpath的代码例子.html">
            
                
                    <a href="../XPATH/Xpath的代码例子.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>3.2.</b>
                        
                        XPATH的代码例子
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="3.3" data-path="XPATH/xpath的爬虫练习.html">
            
                
                    <a href="../XPATH/xpath的爬虫练习.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>3.3.</b>
                        
                        XPATH的爬虫练习
                    </a>
            
            
        </li>
    

            </ul>
            
        </li>
    
        <li class="chapter " data-level="4" data-path="BeautifulSoup/BeautifulSoup.html">
            
                
                    <a href="../BeautifulSoup/BeautifulSoup.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>4.</b>
                        
                        BeautifulSoup
                    </a>
            
            
            <ul class="articles">
                
    
        <li class="chapter " data-level="4.1" data-path="BeautifulSoup/常用的css选择器.html">
            
                
                    <a href="../BeautifulSoup/常用的css选择器.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>4.1.</b>
                        
                        常用的css选择器
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="4.2" data-path="BeautifulSoup/BeautifulSoup4的各种例子.html">
            
                
                    <a href="../BeautifulSoup/BeautifulSoup4的各种例子.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>4.2.</b>
                        
                        BeautifulSoup4的各种例子
                    </a>
            
            
        </li>
    

            </ul>
            
        </li>
    
        <li class="chapter " data-level="5" data-path="代理/proxy.html">
            
                
                    <a href="../代理/proxy.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>5.</b>
                        
                        ip代理池项目
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="6" data-path="Selenium/Selenium.html">
            
                
                    <a href="../Selenium/Selenium.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>6.</b>
                        
                        Selenium
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="7" data-path="进程线程协程/introduce.html">
            
                
                    <a href="../进程线程协程/introduce.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>7.</b>
                        
                        进程线程协程
                    </a>
            
            
            <ul class="articles">
                
    
        <li class="chapter " data-level="7.1" data-path="进程线程协程/gevent.html">
            
                
                    <a href="../进程线程协程/gevent.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>7.1.</b>
                        
                        gevent
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="7.2" data-path="进程线程协程/green_let.html">
            
                
                    <a href="../进程线程协程/green_let.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>7.2.</b>
                        
                        green_let
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="7.3" data-path="进程线程协程/yield.html">
            
                
                    <a href="../进程线程协程/yield.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>7.3.</b>
                        
                        yield
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="7.4" data-path="进程线程协程/multiprecessing.html">
            
                
                    <a href="../进程线程协程/multiprecessing.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>7.4.</b>
                        
                        multiprecessing
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="7.5" data-path="进程线程协程/threading.html">
            
                
                    <a href="../进程线程协程/threading.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>7.5.</b>
                        
                        threading
                    </a>
            
            
        </li>
    

            </ul>
            
        </li>
    
        <li class="chapter " data-level="8" data-path="scrapy框架/scrapy.html">
            
                
                    <a href="../scrapy框架/scrapy.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>8.</b>
                        
                        Scrapy
                    </a>
            
            
            <ul class="articles">
                
    
        <li class="chapter " data-level="8.1" data-path="scrapy框架/scrapy_setting.html">
            
                
                    <a href="../scrapy框架/scrapy_setting.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>8.1.</b>
                        
                        scrapy_setting
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="8.2" data-path="scrapy框架/模块作用.html">
            
                
                    <a href="../scrapy框架/模块作用.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>8.2.</b>
                        
                        模块作用
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="8.3" data-path="scrapy框架/19个中间件.html">
            
                
                    <a href="../scrapy框架/19个中间件.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>8.3.</b>
                        
                        19个中间件
                    </a>
            
            
        </li>
    

            </ul>
            
        </li>
    
        <li class="chapter " data-level="9" data-path="scrapy-redis分布式/scrapy-redis.html">
            
                
                    <a href="../scrapy-redis分布式/scrapy-redis.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>9.</b>
                        
                        scrapy-redis分布式
                    </a>
            
            
            <ul class="articles">
                
    
        <li class="chapter " data-level="9.1" data-path="scrapy-redis分布式/scrapy-redis的改造方法.html">
            
                
                    <a href="../scrapy-redis分布式/scrapy-redis的改造方法.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>9.1.</b>
                        
                        scrapy-redis的改造方法
                    </a>
            
            
        </li>
    

            </ul>
            
        </li>
    
        <li class="chapter " data-level="10" data-path="spider_projects/introduce.html">
            
                
                    <a href="../spider_projects/introduce.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>10.</b>
                        
                        spider_projects
                    </a>
            
            
            <ul class="articles">
                
    
        <li class="chapter " data-level="10.1" data-path="spider_projects/使用代理在普通爬虫脚本下.html">
            
                
                    <a href="../spider_projects/使用代理在普通爬虫脚本下.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>10.1.</b>
                        
                        使用代理在普通爬虫脚本下
                    </a>
            
            
        </li>
    
        <li class="chapter active" data-level="10.2" data-path="spider_projects/xpath爬取前程无忧的数据.html">
            
                
                    <a href="../spider_projects/xpath爬取前程无忧的数据.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>10.2.</b>
                        
                        xpath爬取前程无忧的数据
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="10.3" data-path="spider_projects/存储数据库的脚本.html">
            
                
                    <a href="../spider_projects/存储数据库的脚本.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>10.3.</b>
                        
                        存储数据库的脚本
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="10.4" data-path="spider_projects/weibo.html">
            
                
                    <a href="../spider_projects/weibo.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>10.4.</b>
                        
                        爬取央视新闻微博
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="10.5" data-path="spider_projects/weibo2.html">
            
                
                    <a href="../spider_projects/weibo2.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>10.5.</b>
                        
                        爬取明星的微博
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="10.6" data-path="spider_projects/liepin.html">
            
                
                    <a href="../spider_projects/liepin.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>10.6.</b>
                        
                        爬取猎聘的职位信息Crawl
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="10.7" data-path="spider_projects/zhilianRe.html">
            
                
                    <a href="../spider_projects/zhilianRe.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>10.7.</b>
                        
                        纯正则爬取智联的职位信息
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="10.8" data-path="spider_projects/setting.html">
            
                
                    <a href="../spider_projects/setting.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>10.8.</b>
                        
                        scrapy中的setting
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="10.9" data-path="spider_projects/items.html">
            
                
                    <a href="../spider_projects/items.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>10.9.</b>
                        
                        scrapy中的items
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="10.10" data-path="spider_projects/pipeline.html">
            
                
                    <a href="../spider_projects/pipeline.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>10.10.</b>
                        
                        scrapy中的pipeline
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="10.11" data-path="spider_projects/middlewares.html">
            
                
                    <a href="../spider_projects/middlewares.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>10.11.</b>
                        
                        scrapy中的middlewares
                    </a>
            
            
        </li>
    
        <li class="chapter " data-level="10.12" data-path="spider_projects/框架中的代码运行脚本.html">
            
                
                    <a href="../spider_projects/框架中的代码运行脚本.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>10.12.</b>
                        
                        框架中的代码运行脚本
                    </a>
            
            
        </li>
    

            </ul>
            
        </li>
    
        <li class="chapter " data-level="11" data-path="爬虫的面试题/面试题1-10.html">
            
                
                    <a href="../爬虫的面试题/面试题1-10.html">
                
                        <i class="fa fa-check"></i>
                        
                            <b>11.</b>
                        
                        爬虫面试题
                    </a>
            
            
        </li>
    


            
            <li class="divider"></li>
            <li>
                <a href="https://www.gitbook.com" target="blank" class="gitbook-link">
                    Published with GitBook
                </a>
            </li>
            
        </ul>
    </nav>
</div>

    <div class="book-body">
        <div class="body-inner">
            <div class="book-header" role="navigation">
    <!-- Actions Left -->
    

    <!-- Title -->
    <h1>
        <i class="fa fa-circle-o-notch fa-spin"></i>
        <a href="../" >爬虫</a>
    </h1>
</div>

            <div class="page-wrapper" tabindex="-1" role="main">
                <div class="page-inner">
                
                
                    <section class="normal" id="section-">
                    
                        <h1 id="&#x722C;&#x53D6;&#x524D;&#x7A0B;&#x65E0;&#x5FE7;&#x7684;&#x6570;&#x636E;">&#x722C;&#x53D6;&#x524D;&#x7A0B;&#x65E0;&#x5FE7;&#x7684;&#x6570;&#x636E;</h1>
<pre><code class="lang-python">
<span class="hljs-keyword">import</span> requests
<span class="hljs-keyword">from</span> lxml <span class="hljs-keyword">import</span> etree
<span class="hljs-keyword">from</span> spider_projects.mysqlhelper <span class="hljs-keyword">import</span> MysqlHelper
<span class="hljs-keyword">from</span> urllib <span class="hljs-keyword">import</span> parse
<span class="hljs-keyword">import</span> time
<span class="hljs-keyword">import</span> re

<span class="hljs-comment">#&#x5C01;&#x88C5;&#x4E00;&#x4E2A;&#x51FD;&#x6570;&#xFF0C;&#x7528;&#x6765;&#x83B7;&#x53D6;&#x6570;&#x636E;&#xFF0C;&#x5224;&#x65AD;&#x6570;&#x636E;&#x662F;&#x5426;&#x5B58;&#x5728;</span>
<span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">Decide</span><span class="hljs-params">(element)</span>:</span>
    <span class="hljs-keyword">if</span> element:
        element=element[<span class="hljs-number">0</span>]
    <span class="hljs-keyword">else</span>:
        element = <span class="hljs-string">&quot; &quot;</span>
    <span class="hljs-keyword">return</span> element

helper = MysqlHelper()
base_url = <span class="hljs-string">&apos;https://search.51job.com/list/010000,000000,0000,00,9,99,java,2,{}.html?lang=c&amp;stype=1&amp;postchannel=0000&amp;workyear=99&amp;cotype=99&amp;degreefrom=99&amp;jobterm=99&amp;companysize=99&amp;lonlat=0%2C0&amp;radius=-1&amp;ord_field=0&amp;confirmdate=9&amp;fromType=&amp;dibiaoid=0&amp;address=&amp;line=&amp;specialarea=00&amp;from=&amp;welfare=&apos;</span>
<span class="hljs-comment">#&#x8BF7;&#x6C42;&#x5934;</span>
headers ={
    <span class="hljs-string">&apos;Accept&apos;</span>: <span class="hljs-string">&apos;text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8&apos;</span>,
    <span class="hljs-comment"># &apos;Accept-Encoding&apos;: &apos;gzip, deflate, br&apos;,</span>
    <span class="hljs-string">&apos;Accept-Language&apos;</span>: <span class="hljs-string">&apos;zh-CN,zh;q=0.9&apos;</span>,
    <span class="hljs-string">&apos;Cache-Control&apos;</span>: <span class="hljs-string">&apos;max-age=0&apos;</span>,
    <span class="hljs-string">&apos;Connection&apos;</span>: <span class="hljs-string">&apos;keep-alive&apos;</span>,
    <span class="hljs-string">&apos;Cookie&apos;</span>: <span class="hljs-string">&apos;guid=5e80d2479dd198d859c503e40a9bb4aa; nsearch=jobarea%3D%26%7C%26ord_field%3D%26%7C%26recentSearch0%3D%26%7C%26recentSearch1%3D%26%7C%26recentSearch2%3D%26%7C%26recentSearch3%3D%26%7C%26recentSearch4%3D%26%7C%26collapse_expansion%3D; adv=adsnew%3D1%26%7C%26adsresume%3D1%26%7C%26adsfrom%3Dhttps%253A%252F%252Fwww.baidu.com%252Fbaidu.php%253Fsc.0s0000KlmKuCTsdU2Ie5cilLfWfMcG39IbDBuBRmslDhKajt6Pm5oVUB1Ia9033A5pXt9nRrTgkwp7zGOCKF2kdJaUGKRm49shunfiu72wdpjZgt_0KeNOvYBbGRCryT1Od126mgTluhnUIYPYJu1fRZk46fKXbx2GwPI0nh2EoerGRzAf.7Y_NR2Ar5Od66CHnsGtVdXNdlc2D1n2xx81IZ76Y_NtX5W3eS1J1-k_nOEOlecxLO3MHSEwECntx135zOCxgvg45E6OeAHxfOgkOdkxo3O-CLOWEWEzgxwOsr5Oml5dlpoQOvSajSw7OVxwxS9yOO_xVYXveqElqEgVmvfdG_H3en-dvHFIjxvuQVOB-MFb8lRq5uEtN2s1f_IhOF_L2.U1Yk0ZDqkea11neXYtxPS0KspynqnfKY5TXs_t1rLV5Z1x60pyYqnW0Y0ATqmhNsT100Iybqmh7GuZR0TA-b5HD0mv-b5H00UgfqnH0krNtknjDLg1c4rH-xn1msnfKopHYs0ZFY5HmvPsK-pyfqnWmdnWwxnHfzndtzPWbvP7tznHDsn7tkrjRvn7tzPWndn7tznWDdrfKBpHYznjf0UynqnH0snNtLrjm3nH6zPjNxn10vnWnLnHfsP7ts0Z7spyfqn0Kkmv-b5H00ThIYmyTqn0K9mWYsg100ugFM5H00TZ0qn0K8IM0qna3snj0snj0sn0KVIZ0qn0KbuAqs5H00ThCqnfKbugmqTAn0uMfqn0KspjYs0Aq15H00mMTqnH00UMfqn0K1XWY0IZN15HD1n161nj61PWcdn1n3rjcvPWmz0ZF-TgfqnHR1P1f3rHcYrj6dP6K1pyfqmHN-PANbmH6snj04nAN9n6KWTvYqnRRsnWIKfHPKP16knYcdnfK9m1Yk0ZK85H00TydY5H00Tyd15H00XMfqn0KVmdqhThqV5HKxn7tsg100uA78IyF-gLK_my4GuZnqn7tsg1Kxn0Ksmgwxuhk9u1Ys0AwWpyfqn0K-IA-b5iYk0A71TAPW5H00IgKGUhPW5H00Tydh5H00uhPdIjYs0AulpjYs0Au9IjYs0ZGsUZN15H00mywhUA7M5HD0UAuW5H00mLFW5HfdP1m3%2526ck%253D1583.4.110.165.538.610.616.297%2526shh%253Dwww.baidu.com%2526sht%253Dbaidu%2526us%253D1.0.1.0.1.303.0%2526ie%253Dutf-8%2526f%253D8%2526tn%253Dbaidu%2526wd%253D%2525E5%252589%25258D%2525E7%2525A8%25258B%2525E6%252597%2525A0%2525E5%2525BF%2525A7%2525E7%2525BD%252591%2526rqlang%253Dcn%2526inputT%253D4178%2526bc%253D110101%26%7C%26adsnum%3D1337794; partner=www_baidu_com; 51job=cenglish%3D0%26%7C%26; search=jobarea%7E%60010000%7C%21ord_field%7E%600%7C%21recentSearch0%7E%601%A1%FB%A1%FA010000%2C00%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FAJava+%BF%AA%B7%A2%A1%FB%A1%FA2%A1%FB%A1%FA%A1%FB%A1%FA-1%A1%FB%A1%FA1537489571%A1%FB%A1%FA0%A1%FB%A1%FA%A1%FB%A1%FA%7C%21recentSearch1%7E%601%A1%FB%A1%FA010000%2C00%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FAjava%A1%FB%A1%FA2%A1%FB%A1%FA%A1%FB%A1%FA-1%A1%FB%A1%FA1537455456%A1%FB%A1%FA0%A1%FB%A1%FA%A1%FB%A1%FA%7C%21&apos;</span>,
    <span class="hljs-string">&apos;Host&apos;</span>: <span class="hljs-string">&apos;search.51job.com&apos;</span>,
    <span class="hljs-string">&apos;Referer&apos;</span>: <span class="hljs-string">&apos;https://www.51job.com/&apos;</span>,
    <span class="hljs-string">&apos;Upgrade-Insecure-Requests&apos;</span>: <span class="hljs-string">&apos;1&apos;</span>,
    <span class="hljs-string">&apos;User-Agent&apos;</span>: <span class="hljs-string">&apos;Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36&apos;</span>
}
<span class="hljs-comment">#269  1-269</span>
<span class="hljs-comment">#&#x5BF9;&#x4E8E;&#x9875;&#x7801;&#x7684;&#x83B7;&#x53D6;</span>
<span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> range(<span class="hljs-number">1</span>,<span class="hljs-number">266</span>):
    url = base_url.format(i)
    print(<span class="hljs-string">&quot;----------------------------&#x7B2C;&quot;</span>,i,<span class="hljs-string">&quot;&#x9875;------------------------------------&quot;</span>)
    response = requests.get(url,headers=headers)
    html_ele = etree.HTML(response.text)
    a_eles_list = html_ele.xpath(<span class="hljs-string">&apos;//div[@class=&quot;dw_table&quot;]/div[position()&gt;3]/p/span/a/@href&apos;</span>)
    <span class="hljs-comment"># &#x5FAA;&#x73AF;&#x8BBF;&#x95EE;&#x8FD9;&#x4E9B;a&#x6807;&#x7B7E;</span>
    <span class="hljs-keyword">for</span> href <span class="hljs-keyword">in</span> a_eles_list:
        response = requests.get(href,headers=headers)
        <span class="hljs-comment"># &#x5229;&#x7528;xpath&#x83B7;&#x53D6;&#x6570;&#x636E;</span>
        html_ele = etree.HTML(response.text)
        <span class="hljs-keyword">try</span>:
            <span class="hljs-comment"># &#x7528;xpath &#x627E;&#x5230;&#x8BE6;&#x60C5;&#x9875;&#x91CC;&#x7684; title</span>
            title = html_ele.xpath(<span class="hljs-string">&apos;//h1/@title&apos;</span>)
            title = Decide(title)
            <span class="hljs-comment"># &#x804C;&#x4F4D;&#x5DE5;&#x8D44;&#x7684;&#x83B7;&#x53D6;</span>
            salary = html_ele.xpath(<span class="hljs-string">&apos;//div[@class=&quot;cn&quot;]/strong/text()&apos;</span>)
            salary = Decide(salary)
            <span class="hljs-comment">#&#x83B7;&#x53D6;&#x9875;&#x9762;&#x91CC;&#x5173;&#x4E8E;&#x5DE5;&#x4F5C;&#x5730;&#x70B9;&#xFF0C;&#x7ECF;&#x9A8C;&#x8981;&#x6C42;&#xFF0C;&#x5B66;&#x5386;&#x8981;&#x6C42;&#xFF0C;&#x62DB;&#x8058;&#x4EBA;&#x6570;&#xFF0C;&#x62DB;&#x8058;&#x65F6;&#x95F4;&#x7684;&#x5217;&#x8868;</span>
            info = html_ele.xpath(<span class="hljs-string">&apos;//div[@class=&quot;cn&quot;]/p[2]/text()&apos;</span>)
            <span class="hljs-comment">#&#x524D;&#x7A0B;&#x65E0;&#x5FE7;&#x662F;&#x4E24;&#x4E2A;&#x6A21;&#x677F;&#xFF0C;&#x6240;&#x4EE5;&#x9700;&#x8981;&#x505A;&#x4E2A;&#x5904;&#x7406;&#x5224;&#x65AD;&#x3002;</span>
            count = len(info)
            <span class="hljs-comment">#&#x5982;&#x679C;&#x83B7;&#x53D6;&#x7684;&#x4FE1;&#x606F;&#x957F;&#x5EA6;&#x662F;&#x5927;&#x4E8E;&#x56DB;&#x7684; &#x505A;&#x4E2A;&#x83B7;&#x53D6;&#xFF0C;&#x5426;&#x5219;&#x505A;&#x53E6;&#x4E00;&#x79CD;&#x65B9;&#x6CD5;&#x83B7;&#x53D6;</span>
            <span class="hljs-keyword">if</span> count &gt; <span class="hljs-number">4</span> :
                <span class="hljs-comment">#&#x83B7;&#x53D6;&#x5DE5;&#x4F5C;&#x5730;&#x70B9;</span>
                position = html_ele.xpath(<span class="hljs-string">&apos;//div[@class=&quot;cn&quot;]/p[2]/text()&apos;</span>)
                position = Decide(position).strip()
                <span class="hljs-comment">#&#x83B7;&#x53D6;&#x7ECF;&#x9A8C;&#x8981;&#x6C42;</span>
                experince = html_ele.xpath(<span class="hljs-string">&apos;//div[@class=&quot;cn&quot;]/p[2]/text()&apos;</span>)
                <span class="hljs-keyword">if</span> experince:
                    experince = experince[<span class="hljs-number">1</span>].strip()
                <span class="hljs-keyword">else</span>:
                    experince=<span class="hljs-string">&quot; &quot;</span>
                <span class="hljs-comment">#&#x83B7;&#x53D6;&#x5B66;&#x5386;&#x8981;&#x6C42;</span>
                education = html_ele.xpath(<span class="hljs-string">&apos;//div[@class=&quot;cn&quot;]/p[2]/text()&apos;</span>)
                <span class="hljs-keyword">if</span> education:
                    education = education[<span class="hljs-number">2</span>].strip()
                <span class="hljs-keyword">else</span>:
                    education=<span class="hljs-string">&quot; &quot;</span>
                <span class="hljs-comment">#&#x83B7;&#x53D6;&#x62DB;&#x8058;&#x4EBA;&#x6570;</span>
                number = html_ele.xpath(<span class="hljs-string">&apos;//div[@class=&quot;cn&quot;]/p[2]/text()&apos;</span>)
                <span class="hljs-keyword">if</span> number:
                    number = number[<span class="hljs-number">3</span>].strip()
                <span class="hljs-keyword">else</span>:
                    number=<span class="hljs-string">&quot; &quot;</span>
                <span class="hljs-comment">#&#x83B7;&#x53D6;&#x62DB;&#x8058;&#x65F6;&#x95F4;&#x7684;&#x663E;&#x793A;</span>
                time_show = html_ele.xpath(<span class="hljs-string">&apos;//div[@class=&quot;cn&quot;]/p[2]/text()&apos;</span>)
                <span class="hljs-keyword">if</span> time_show:
                    time_show = time_show[<span class="hljs-number">4</span>].strip()
                <span class="hljs-keyword">else</span>:
                    time_show=<span class="hljs-string">&quot; &quot;</span>
            <span class="hljs-keyword">else</span>:
                <span class="hljs-comment">#&#x5DE5;&#x4F5C;&#x5730;&#x70B9;&#x7684;&#x83B7;&#x53D6;</span>
                position = html_ele.xpath(<span class="hljs-string">&apos;//div[@class=&quot;cn&quot;]/p[2]/text()&apos;</span>)
                position = Decide(position).strip()
                <span class="hljs-comment">#&#x5DE5;&#x4F5C;&#x7ECF;&#x9A8C;&#x7684;&#x83B7;&#x53D6;</span>
                experince = html_ele.xpath(<span class="hljs-string">&apos;//div[@class=&quot;cn&quot;]/p[2]/text()&apos;</span>)
                <span class="hljs-keyword">if</span> experince:
                    experince = experince[<span class="hljs-number">1</span>].strip()
                <span class="hljs-keyword">else</span>:
                    experince = <span class="hljs-string">&quot; &quot;</span>
                <span class="hljs-comment">#&#x5BF9;&#x4E8E;&#x4F4E;&#x4E8E;&#x957F;&#x5EA6;&#x7B49;&#x4E8E;4 &#x7684; &#x6CA1;&#x6709;&#x5B66;&#x5386;&#x7684;&#x8981;&#x6C42;&#xFF0C;&#x6240;&#x4EE5;&#x8981;&#x505A;&#x4E2A;&#x5355;&#x72EC;&#x7684;&#x5904;&#x7406;</span>
                education = <span class="hljs-string">&quot; &quot;</span>
                <span class="hljs-comment">#&#x62DB;&#x8058;&#x4EBA;&#x6570;&#x7684;&#x83B7;&#x53D6;</span>
                number = html_ele.xpath(<span class="hljs-string">&apos;//div[@class=&quot;cn&quot;]/p[2]/text()&apos;</span>)
                <span class="hljs-keyword">if</span> number:
                    number = number[<span class="hljs-number">2</span>].strip()
                <span class="hljs-keyword">else</span>:
                    number = <span class="hljs-string">&quot; &quot;</span>
                <span class="hljs-comment">#&#x62DB;&#x8058;&#x65F6;&#x95F4;&#x7684;&#x663E;&#x793A;</span>
                time_show = html_ele.xpath(<span class="hljs-string">&apos;//div[@class=&quot;cn&quot;]/p[2]/text()&apos;</span>)
                <span class="hljs-keyword">if</span> time_show:
                    time_show = time_show[<span class="hljs-number">3</span>].strip()
                <span class="hljs-keyword">else</span>:
                    time_show = <span class="hljs-string">&quot; &quot;</span>
            <span class="hljs-comment"># print(title,salary,position,experince,education,num,time_show)</span>
            <span class="hljs-comment">#&#x83B7;&#x53D6;&#x5DE5;&#x4F5C;&#x7684;&#x7684;&#x5C97;&#x4F4D;&#x804C;&#x8D23;&#x7684;&#x6807;&#x9898;</span>
            job_title = html_ele.xpath(<span class="hljs-string">&apos;//div[@class=&quot;bmsg job_msg inbox&quot;]/p/strong//text()&apos;</span>)
            job_title = Decide(job_title).strip()
            <span class="hljs-comment">#&#x83B7;&#x53D6;&#x5C97;&#x4F4D;&#x4FE1;&#x606F;</span>
            job_info =html_ele.xpath(<span class="hljs-string">&apos;//div[@class=&quot;bmsg job_msg inbox&quot;]/p//text()&apos;</span>)
            <span class="hljs-comment">#&#x524D;&#x7A0B;&#x65E0;&#x5FE7;&#x5173;&#x4E8E;&#x9875;&#x9762;&#x7684;&#x5C97;&#x4F4D;&#x4FE1;&#x606F;&#x663E;&#x793A;&#xFF0C;&#x662F;&#x4E24;&#x4E2A;&#x6A21;&#x677F;&#xFF0C;&#x6240;&#x4EE5;&#x8FD9;&#x4E2A;&#x65F6;&#x5019;&#x8981;&#x505A;&#x4E2A;&#x5224;&#x65AD;&#xFF0C;</span>
            <span class="hljs-comment"># &#x5982;&#x679C;&#x4E00;&#x4E2A;&#x6A21;&#x677F;&#x80FD;&#x83B7;&#x53D6;&#x6210;&#x529F;&#x7684;&#x8BDD;&#x5C31;&#x83B7;&#x53D6;&#x4FE1;&#x606F;&#xFF0C;&#x5982;&#x679C;&#x4E0D;&#x6210;&#x529F;&#xFF0C;&#x5C31;&#x662F;&#x53E6;&#x4E2A;&#x6A21;&#x677F;&#xFF0C;&#x6765;&#x83B7;&#x53D6;&#x4FE1;&#x606F;</span>
            <span class="hljs-keyword">if</span> job_info:
                job_info = <span class="hljs-string">&apos;&apos;</span>.join(job_info)
                job_info = re.findall(<span class="hljs-string">&apos;\S&apos;</span>, job_info)
                job_info = <span class="hljs-string">&apos;&apos;</span>.join(job_info)
                description= job_title+job_info
                <span class="hljs-comment"># print(description)</span>
            <span class="hljs-keyword">else</span>:
                job_title = html_ele.xpath(<span class="hljs-string">&apos;//h2/span[@class=&quot;bname&quot;]/text()&apos;</span>)
                job_title = Decide(job_title).strip()
                job_info = html_ele.xpath(<span class="hljs-string">&apos;//div[@class=&quot;bmsg job_msg inbox&quot;]//text()&apos;</span>)
                job_info = <span class="hljs-string">&apos;&apos;</span>.join(job_info)
                job_info = re.findall(<span class="hljs-string">&apos;\S&apos;</span>,job_info)
                job_info = <span class="hljs-string">&apos;&apos;</span>.join(job_info)
                description = job_title + job_info
                <span class="hljs-comment"># print(description)</span>

                <span class="hljs-comment"># &#x516C;&#x53F8;&#x540D;&#x79F0;</span>
                com_name = html_ele.xpath(<span class="hljs-string">&apos;//div[@class =&quot;com_msg&quot;]/a/p/text()&apos;</span>)[<span class="hljs-number">0</span>]

                <span class="hljs-comment"># print(com_name)</span>
            <span class="hljs-comment">#&#x6211;&#x7684;&#x6DFB;&#x52A0;&#x65F6;&#x95F4;</span>
            add_time = time.strftime(<span class="hljs-string">&quot;%Y/%m/%d %H:%M:%S&quot;</span>, time.localtime())
            <span class="hljs-comment">#&#x6765;&#x81EA;&#x4E8E;&#x54EA;&#x4E2A;&#x7F51;&#x7AD9;</span>
            from_web = <span class="hljs-string">&quot;jobs.51job.com&#x524D;&#x7A0B;&#x65E0;&#x5FE7;&quot;</span>
            data = (title, salary, position,experince,education,number,time_show, description,com_name,add_time,from_web)
            print(data)
            print(<span class="hljs-string">&quot;****&quot;</span>*<span class="hljs-number">30</span>)
            <span class="hljs-comment"># &#x5B58;&#x50A8;&#x6570;&#x636E;&#x5230;mysql</span>
            insert_sql = <span class="hljs-string">&apos;INSERT INTO job_zhaopin(title, salary, position,experince,education,number,time_show, description,com_name,add_time,from_web) VALUES (%s, %s, %s, %s,%s,%s, %s, %s, %s,%s, %s)&apos;</span>
            helper.execute_modify_sql(insert_sql, data)
        <span class="hljs-keyword">except</span>:
            <span class="hljs-keyword">pass</span>
</code></pre>

                    
                    </section>
                
                
                </div>
            </div>
        </div>

        
        <a href="../spider_projects/使用代理在普通爬虫脚本下.html" class="navigation navigation-prev " aria-label="Previous page: 使用代理在普通爬虫脚本下"><i class="fa fa-angle-left"></i></a>
        
        
        <a href="../spider_projects/存储数据库的脚本.html" class="navigation navigation-next " aria-label="Next page: 存储数据库的脚本"><i class="fa fa-angle-right"></i></a>
        
    </div>
</div>

        
<script src="../gitbook/app.js"></script>

    
    <script src="../gitbook/plugins/gitbook-plugin-search/lunr.min.js"></script>
    

    
    <script src="../gitbook/plugins/gitbook-plugin-search/search.js"></script>
    

    
    <script src="../gitbook/plugins/gitbook-plugin-sharing/buttons.js"></script>
    

    
    <script src="../gitbook/plugins/gitbook-plugin-fontsettings/buttons.js"></script>
    

<script>
require(["gitbook"], function(gitbook) {
    var config = {"highlight":{},"search":{"maxIndexSize":1000000},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"fontsettings":{"theme":"white","family":"sans","size":2}};
    gitbook.start(config);
});
</script>

        
    </body>
    
</html>
